setwd(“~/Box Sync/CODE_DATA/dir_MAPpaper/R_figures”)
setwd(“~/Box Sync/CODE_DATA/dir_MAPpaper/CADDmapped/BUG_FIX”)
# cys merge of all detected and non detected rows
cmerg <- read.csv("/Users/mariapalafox/Box\ Sync/CODE_DATA/dir_MAPpaper/CADDmapped/BUG_FIX/POSspecific_Cdet_not_merge.csv") %>%
mutate(assembly = factor(assembly, levels = c(
"hg19",
"hg38"
))) %>%
mutate(group = factor(group, levels = c(
"Detected",
"Other"
)))
# lys merge
kmerg <- read.csv("/Users/mariapalafox/Box\ Sync/CODE_DATA/dir_MAPpaper/CADDmapped/BUG_FIX/POSspecific_Kdet_not_merge.csv") %>%
mutate(assembly = factor(assembly, levels = c(
"hg19",
"hg38"
))) %>%
mutate(group = factor(group, levels = c(
"Detected",
"Other"
)))
summary(cmerg)
## index CADD_avg CADD_max assembly
## A0AVT1_C156: 2 Min. : 0.03643 Min. : 0.116 hg19:41080
## A0AVT1_C17 : 2 1st Qu.:23.15714 1st Qu.:24.800 hg38:41080
## A0AVT1_C174: 2 Median :25.84286 Median :28.200
## A0AVT1_C178: 2 Mean :24.90547 Mean :27.701
## A0AVT1_C197: 2 3rd Qu.:28.14286 3rd Qu.:32.000
## A0AVT1_C231: 2 Max. :35.00000 Max. :59.000
## (Other) :82148
## group
## Detected:12300
## Other :69860
##
##
##
##
##
summary(kmerg)
## index CADD_avg CADD_max assembly
## A0AVT1_K1003: 2 Min. : 0.01914 Min. : 0.059 hg19:154029
## A0AVT1_K1006: 2 1st Qu.:22.64286 1st Qu.:24.300 hg38:154029
## A0AVT1_K1011: 2 Median :24.51429 Median :27.000
## A0AVT1_K1014: 2 Mean :24.11317 Mean :26.966
## A0AVT1_K1019: 2 3rd Qu.:26.54286 3rd Qu.:29.700
## A0AVT1_K1020: 2 Max. :36.28571 Max. :57.000
## (Other) :308046
## group
## Detected: 18004
## Other :290054
##
##
##
##
##
Cm <- cmerg %>%
ggplot(aes(x=assembly, y=CADD_avg, fill=group)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Average missense CADD scores for detected vs non-detected Cysteines") +
ylab("CADD score") +
xlab("Assembly groups") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,5,10,15,20,25,30,35))
Cm
ggsave("Cmerge_det_vs_not_violin.png", width = 7, height=4, dpi= 300)
Cmax <- cmerg %>%
ggplot(aes(x=assembly, y=CADD_max,fill=group)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Max missense CADD scores for detected vs not detected Cysteines") +
ylab("CADD score") +
xlab("Assembly groups") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,10,20,30,40,50,60))
Cmax
ggsave("CmergeMAX_det_vs_not_violin.png", width = 7, height=4, dpi= 300)
Km <- kmerg %>%
ggplot(aes(x=assembly, y=CADD_avg, fill=group)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Average missense CADD scores for detected vs not detected Lysines") +
ylab("CADD score") +
xlab("Assembly groups") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,5,10,15,20,25,30,35))
Km
ggsave("Kmerge_det_vs_not_violin.png", width = 7, height=4, dpi= 300)
Kmax <- kmerg %>%
ggplot(aes(x=assembly, y=CADD_max, fill=group)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Max missense CADD scores for detected vs not detected Lysines") +
ylab("CADD score") +
xlab("Assembly groups") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,10,20,30,40,50,60))
Kmax
ggsave("KmergeMAX_det_vs_not_violin.png", width = 7, height=4, dpi= 300)
Cdet <- read.csv("/Users/mariapalafox/Box\ Sync/CODE_DATA/dir_MAPpaper/CADDmapped/BUG_FIX/POSspecific_C_detected_maxmean_6150x2pos.csv") %>%
mutate(assembly = factor(assembly, levels = c(
"hg19",
"hg38"
))) %>%
mutate(Cys_react_threshold = factor(Cys_react_threshold, levels = c(
"Low",
"Medium",
"High"
))) %>%
mutate(Cys_target_label = factor(Cys_target_label, levels = c(
"Target",
"panReactive"
)))
Cnot <- read.csv("/Users/mariapalafox/Box\ Sync/CODE_DATA/dir_MAPpaper/CADDmapped/BUG_FIX/POSspecific_C_notdetected_maxmean_34930x2pos.csv") %>%
mutate(assembly = factor(assembly, levels = c(
"hg19",
"hg38"
)))
Kdet <- read.csv("/Users/mariapalafox/Box\ Sync/CODE_DATA/dir_MAPpaper/CADDmapped/BUG_FIX/POSspecific_K_detected_maxmean_9002x2pos.csv") %>%
mutate(assembly = factor(assembly, levels = c(
"hg19",
"hg38"
))) %>%
mutate(Lys_react_threshold = factor(Lys_react_threshold, levels = c(
"Low",
"Medium",
"High"
))) %>%
mutate(Lys_target_label = factor(Lys_target_label, levels = c(
"Target",
"panReactive"
)))
Knot <- read.csv("/Users/mariapalafox/Box\ Sync/CODE_DATA/dir_MAPpaper/CADDmapped/BUG_FIX/POSspecific_K_notdetected_maxmean_145027x2pos.csv") %>%
mutate(assembly = factor(assembly, levels = c(
"hg19",
"hg38"
)))
summary(Cdet)
## index xref Cys_reactivity Cys_react_threshold
## A0AVT1_C347: 2 P21333 : 50 Min. : 0.740 Low :1858
## A0AVT1_C433: 2 P49327 : 50 1st Qu.: 4.320 Medium: 882
## A0AVT1_C546: 2 Q15149 : 44 Median : 5.575 High : 188
## A0AVT1_C625: 2 P78527 : 40 Mean : 5.927 NA's :9372
## A0AVT1_C721: 2 Q7Z6Z7 : 40 3rd Qu.: 6.830
## A0AVT1_C770: 2 O75369 : 36 Max. :20.000
## (Other) :12288 (Other):12040 NA's :9372
## Cys_target_label geneNamePrimary CADD_avg CADD_max
## Target : 1424 FASN : 50 Min. : 0.06386 Min. : 0.396
## panReactive:10250 FLNA : 50 1st Qu.:22.05679 1st Qu.:23.800
## NA's : 626 PLEC : 44 Median :24.80000 Median :27.100
## HUWE1 : 40 Mean :23.91074 Mean :26.942
## PRKDC : 40 3rd Qu.:27.60000 3rd Qu.:31.000
## DYNC1H1: 36 Max. :34.71429 Max. :59.000
## (Other):12040
## assembly
## hg19:6150
## hg38:6150
##
##
##
##
##
summary(Cnot)
## index xref geneNamePrimary CADD_avg
## A0AVT1_C156: 2 Q8WZ42 : 1024 TTN : 1024 Min. : 0.03643
## A0AVT1_C17 : 2 Q04721 : 484 NOTCH2 : 484 1st Qu.:23.38571
## A0AVT1_C174: 2 P24043 : 326 LAMA2 : 326 Median :26.00000
## A0AVT1_C178: 2 Q15751 : 238 HERC1 : 238 Mean :25.08060
## A0AVT1_C197: 2 Q5T4S7 : 238 UBR4 : 238 3rd Qu.:28.21429
## A0AVT1_C231: 2 Q70CQ2 : 198 : 206 Max. :35.00000
## (Other) :69848 (Other):67352 (Other):67344
## CADD_max assembly
## Min. : 0.116 hg19:34930
## 1st Qu.:25.100 hg38:34930
## Median :28.300
## Mean :27.835
## 3rd Qu.:32.000
## Max. :53.000
##
summary(Kdet)
## index xref Lys_reactivity Lys_react_threshold
## A0AVT1_K1014: 2 Q09666 : 210 Min. : 0.100 Low :6974
## A0AVT1_K409 : 2 P21333 : 100 1st Qu.: 5.800 Medium:1322
## A0AVT1_K544 : 2 O75369 : 94 Median :10.000 High : 608
## A0AVT1_K86 : 2 P35579 : 84 Mean : 7.923 NA's :9100
## A0AVT1_K963 : 2 Q15149 : 72 3rd Qu.:10.000
## A0JNW5_K645 : 2 P00558 : 60 Max. :10.000
## (Other) :17992 (Other):17384 NA's :9100
## Lys_target_label geneNamePrimary CADD_avg CADD_max
## Target : 230 AHNAK : 210 Min. : 0.01914 Min. : 0.059
## panReactive:15924 FLNA : 100 1st Qu.:23.48571 1st Qu.:25.600
## NA's : 1850 FLNB : 94 Median :25.34286 Median :28.400
## MYH9 : 84 Mean :25.23376 Mean :28.324
## PLEC : 72 3rd Qu.:27.27143 3rd Qu.:32.000
## PGK1 : 60 Max. :34.71429 Max. :40.000
## (Other):17384
## assembly
## hg19:9002
## hg38:9002
##
##
##
##
##
summary(Knot)
## index xref geneNamePrimary
## A0AVT1_K1003: 2 Q8WZ42 : 5886 TTN : 5886
## A0AVT1_K1006: 2 P20929 : 1656 NEB : 1656
## A0AVT1_K1011: 2 Q09666 : 1444 AHNAK : 1444
## A0AVT1_K1019: 2 Q8WXH0 : 1188 SYNE2 : 1188
## A0AVT1_K1020: 2 Q9UPN3 : 1080 MACF1 : 1080
## A0AVT1_K115 : 2 P46013 : 748 MKI67 : 748
## (Other) :290042 (Other):278052 (Other):278052
## CADD_avg CADD_max assembly
## Min. : 0.02229 Min. : 0.07 hg19:145027
## 1st Qu.:22.60000 1st Qu.:24.30 hg38:145027
## Median :24.45714 Median :26.90
## Mean :24.04361 Mean :26.88
## 3rd Qu.:26.48571 3rd Qu.:29.60
## Max. :36.28571 Max. :57.00
##
GROUP SPECIFIC VIOLIN PLOTS - removed – theme(legend.position=‘none’)
Credblue <- Cdet %>%
filter(Cys_react_threshold == "Low" | Cys_react_threshold == "Medium" | Cys_react_threshold == "High") %>%
ggplot(aes(x=Cys_react_threshold, y=CADD_avg, fill=assembly)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Average missense CADD score for detected Cysteine codons") +
ylab("CADD score") +
xlab("Reactivity groups") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,5,10,15,20,25,30,35))
Credblue
ggsave("Creact_avg_violin.png", width = 7, height=4, dpi= 300)
Cpan <- Cdet %>%
filter(Cys_target_label == "Target" | Cys_target_label == "panReactive") %>%
ggplot(aes(x=Cys_target_label, y=CADD_avg, fill=assembly)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Average missense CADD score for detected Cysteine codons") +
ylab("CADD score") +
xlab("Target groups") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,5,10,15,20,25,30,35))
Cpan
ggsave("Cpan_avg_violin.png", width = 7, height=4, dpi= 300)
Kredblue <- Kdet %>%
filter(Lys_react_threshold == "Low" | Lys_react_threshold == "Medium" | Lys_react_threshold == "High") %>%
ggplot(aes(x=Lys_react_threshold, y=CADD_avg, fill=assembly)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Average missense CADD score for detected Lysine codons") +
ylab("CADD score") +
xlab("Reactivity group") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,5,10,15,20,25,30,35))
Kredblue
ggsave("Kreact_avg_violin.png", width = 7, height=4, dpi= 300)
Kpan <- Kdet %>%
filter(Lys_target_label == "Target" | Lys_target_label == "panReactive") %>%
ggplot(aes(x=Lys_target_label, y=CADD_avg,fill=assembly)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Average missense CADD score for detected Lysine codons") +
ylab("CADD score") +
xlab("Target groups") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,5,10,15,20,25,30,35))
Kpan
ggsave("Kpan_avg_violin.png", width = 7, height=4, dpi= 300)
caddCmerge <- read.csv("/Users/mariapalafox/Box\ Sync/CODE_DATA/dir_MAPpaper/CADDmapped/BUG_FIX/CADDspecific_Cdet_not_merge.csv") %>%
mutate(assembly = factor(assembly, levels = c(
"hg19",
"hg38"
))) %>%
mutate(group = factor(group, levels = c(
"Detected",
"Other"
)))
caddKmerge <- read.csv("/Users/mariapalafox/Box\ Sync/CODE_DATA/dir_MAPpaper/CADDmapped/BUG_FIX/CADDspecific_Kdet_not_merge.csv") %>%
mutate(assembly = factor(assembly, levels = c(
"hg19",
"hg38"
))) %>%
mutate(group = factor(group, levels = c(
"Detected",
"Other"
)))
summary(caddCmerge)
## pos_id19 pos_id38 xref
## 1_000949592_T_A: 2 1_001014212_T_A: 2 Q8WZ42 : 7182
## 1_000949592_T_C: 2 1_001014212_T_C: 2 Q04721 : 3402
## 1_000949592_T_G: 2 1_001014212_T_G: 2 P24043 : 2282
## 1_000949593_G_A: 2 1_001014213_G_A: 2 Q5T4S7 : 1736
## 1_000949593_G_C: 2 1_001014213_G_C: 2 Q15751 : 1694
## 1_000949593_G_T: 2 1_001014213_G_T: 2 Q8WXH0 : 1442
## (Other) :575100 (Other) :575100 (Other):557374
## matched_target lost_amino gained_amino Amino_acids
## False :489012 Cys:575112 Arg: 82158 Cys/Arg: 82158
## C129 : 392 Gly: 82158 Cys/Gly: 82158
## C39 : 364 Phe: 82160 Cys/Phe: 82160
## C91 : 336 Ser:164318 Cys/Ser:164318
## C106 : 308 Trp: 82158 Cys/Trp: 82158
## C111 : 308 Tyr: 82160 Cys/Tyr: 82160
## (Other): 84392
## geneNamePrimary CADD_score assembly group
## TTN : 7182 Min. : 0.001 hg19:287556 Detected: 86100
## NOTCH2 : 3402 1st Qu.:22.900 hg38:287556 Other :489012
## LAMA2 : 2282 Median :25.500
## UBR4 : 1736 Mean :24.905
## HERC1 : 1694 3rd Qu.:28.200
## : 1562 Max. :59.000
## (Other):557254
summary(caddKmerge)
## pos_id19 pos_id38 xref
## 1_000949382_A_C: 2 1_001014002_A_C: 2 Q8WZ42 : 41426
## 1_000949382_A_G: 2 1_001014002_A_G: 2 P20929 : 11606
## 1_000949383_A_C: 2 1_001014003_A_C: 2 Q09666 : 11578
## 1_000949383_A_G: 2 1_001014003_A_G: 2 Q8WXH0 : 8344
## 1_000949383_A_T: 2 1_001014003_A_T: 2 Q9UPN3 : 7630
## 1_000949384_G_C: 2 1_001014004_G_C: 2 P46013 : 5236
## (Other) :2156606 (Other) :2156606 (Other):2070798
## matched_target lost_amino gained_amino Amino_acids
## False :2030590 Lys:2156618 Arg:308090 Lys/Arg:308090
## K52 : 602 Asn:616176 Lys/Asn:616176
## K63 : 602 Gln:308086 Lys/Gln:308086
## K58 : 588 Glu:308086 Lys/Glu:308086
## K43 : 560 Ile:138844 Lys/Ile:138844
## K50 : 560 Met:169246 Lys/Met:169246
## (Other): 123116 Thr:308090 Lys/Thr:308090
## geneNamePrimary CADD_score assembly group
## TTN : 41426 Min. : 0.001 hg19:1078309 Detected: 126028
## NEB : 11606 1st Qu.:22.600 hg38:1078309 Other :2030590
## AHNAK : 11578 Median :24.200
## SYNE2 : 8344 Mean :24.112
## MACF1 : 7630 3rd Qu.:26.600
## : 5298 Max. :57.000
## (Other):2070736
names(caddCmerge)
## [1] "pos_id19" "pos_id38" "xref"
## [4] "matched_target" "lost_amino" "gained_amino"
## [7] "Amino_acids" "geneNamePrimary" "CADD_score"
## [10] "assembly" "group"
Cm <- caddCmerge %>%
filter(assembly == "hg19" ) %>%
ggplot(aes(x=Amino_acids, y=CADD_score, fill=group)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Detected vs other hg19 CADD scores for Cysteine") +
ylab("CADD score") +
xlab("Missense amino acids") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,10,20,30,40,50,60))
Cm
ggsave("CADDspec19_Cmerge_violin.png", width = 7, height=4, dpi= 300)
Cm38 <- caddCmerge %>%
filter(assembly == "hg38" ) %>%
ggplot(aes(x=Amino_acids, y=CADD_score, fill=group)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Detected vs other hg38 CADD scores for Cysteine") +
ylab("CADD score") +
xlab("Missense amino acids") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,10,20,30,40,50,60))
Cm38
ggsave("CADDspec38_Cmerge_violin.png", width = 7, height=4, dpi= 300)
Km <- caddKmerge %>%
filter(assembly == "hg19" ) %>%
ggplot(aes(x=Amino_acids, y=CADD_score, fill=group)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Detected vs other hg19 CADD scores for Lysine") +
ylab("CADD score") +
xlab("Missense amino acids") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,10,20,30,40,50,60))
Km
ggsave("CADDspec19_Kmerge_violin.png", width = 7, height=4, dpi= 300)
Km38 <- caddKmerge %>%
filter(assembly == "hg38" ) %>%
ggplot(aes(x=Amino_acids, y=CADD_score, fill=group)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Detected vs other hg38 CADD scores for Lysine") +
ylab("CADD score") +
xlab("Missense amino acids") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,10,20,30,40,50,60))
Km38
ggsave("CADDspec38_Kmerge_violin.png", width = 7, height=4, dpi= 300)
col detected : pos_id19,pos_id38,xref,matched_target,lost_amino,gained_amino,Amino_acids,pos_ID,Cys_reactivity,Cys_react_threshold,Cys_target_label,geneNamePrimary,CADD_score,assembly
col not detected : pos_id19,pos_id38,xref,matched_target,lost_amino,gained_amino,Amino_acids,pos_ID_falseCKtarget,geneNamePrimary,CADD_score,assembly
# detected c and k
cad <- read.csv("/Users/mariapalafox/Box\ Sync/CODE_DATA/dir_MAPpaper/CADDmapped/BUG_FIX/CADDspecific_C_detected_43050x2rows.csv") %>%
mutate(assembly = factor(assembly, levels = c(
"hg19",
"hg38"
))) %>%
mutate(Cys_react_threshold = factor(Cys_react_threshold, levels = c(
"Low",
"Medium",
"High"
))) %>%
mutate(Cys_target_label = factor(Cys_target_label, levels = c(
"Target",
"panReactive"
)))
kad <- read.csv("/Users/mariapalafox/Box\ Sync/CODE_DATA/dir_MAPpaper/CADDmapped/BUG_FIX/CADDspecific_K_detected_63014x2rows.csv") %>%
mutate(assembly = factor(assembly, levels = c(
"hg19",
"hg38"
))) %>%
mutate(Lys_react_threshold = factor(Lys_react_threshold, levels = c(
"Low",
"Medium",
"High"
))) %>%
mutate(Lys_target_label = factor(Lys_target_label, levels = c(
"Target",
"panReactive"
)))
# not detected c and k
cadnot <- read.csv("/Users/mariapalafox/Box\ Sync/CODE_DATA/dir_MAPpaper/CADDmapped/BUG_FIX/CADDspecific_C_notdetected_244506x2rows.csv") %>%
mutate(assembly = factor(assembly, levels = c(
"hg19",
"hg38"
)))
kadnot <- read.csv("/Users/mariapalafox/Box\ Sync/CODE_DATA/dir_MAPpaper/CADDmapped/BUG_FIX/CADDspecific_K_notdetected_1015295x2rows.csv") %>%
mutate(assembly = factor(assembly, levels = c(
"hg19",
"hg38"
)))
summary(cad)
## pos_id19 pos_id38 xref
## 1_000949592_T_A: 2 1_001014212_T_A: 2 P21333 : 350
## 1_000949592_T_C: 2 1_001014212_T_C: 2 P49327 : 350
## 1_000949592_T_G: 2 1_001014212_T_G: 2 Q15149 : 308
## 1_000949593_G_A: 2 1_001014213_G_A: 2 P78527 : 280
## 1_000949593_G_C: 2 1_001014213_G_C: 2 Q7Z6Z7 : 280
## 1_000949593_G_T: 2 1_001014213_G_T: 2 O75369 : 252
## (Other) :86088 (Other) :86088 (Other):84280
## matched_target lost_amino gained_amino Amino_acids
## C129 : 392 Cys:86100 Arg:12300 Cys/Arg:12300
## C39 : 364 Gly:12300 Cys/Gly:12300
## C91 : 336 Phe:12300 Cys/Phe:12300
## C106 : 308 Ser:24600 Cys/Ser:24600
## C111 : 308 Trp:12300 Cys/Trp:12300
## C100 : 294 Tyr:12300 Cys/Tyr:12300
## (Other):84098
## pos_ID Cys_reactivity Cys_react_threshold
## A0AVT1_C347: 14 Min. : 0.74 Low :13006
## A0AVT1_C433: 14 1st Qu.: 4.32 Medium: 6174
## A0AVT1_C546: 14 Median : 5.58 High : 1316
## A0AVT1_C625: 14 Mean : 5.93 NA's :65604
## A0AVT1_C721: 14 3rd Qu.: 6.83
## A0AVT1_C770: 14 Max. :20.00
## (Other) :86016 NA's :65604
## Cys_target_label geneNamePrimary CADD_score assembly
## Target : 9968 FASN : 350 Min. : 0.001 hg19:43050
## panReactive:71750 FLNA : 350 1st Qu.:22.200 hg38:43050
## NA's : 4382 PLEC : 308 Median :24.400
## HUWE1 : 280 Mean :23.911
## PRKDC : 280 3rd Qu.:27.500
## DYNC1H1: 252 Max. :59.000
## (Other):84280
summary(kad)
## pos_id19 pos_id38 xref
## 1_000949463_A_C: 2 1_001014083_A_C: 2 Q09666 : 1470
## 1_000949463_A_G: 2 1_001014083_A_G: 2 P21333 : 700
## 1_000949464_A_C: 2 1_001014084_A_C: 2 O75369 : 658
## 1_000949464_A_G: 2 1_001014084_A_G: 2 P35579 : 588
## 1_000949464_A_T: 2 1_001014084_A_T: 2 Q15149 : 504
## 1_000949465_G_C: 2 1_001014085_G_C: 2 P00558 : 420
## (Other) :126016 (Other) :126016 (Other):121688
## matched_target lost_amino gained_amino Amino_acids
## K52 : 602 Lys:126028 Arg:18004 Lys/Arg:18004
## K63 : 602 Asn:36008 Lys/Asn:36008
## K58 : 588 Gln:18004 Lys/Gln:18004
## K43 : 560 Glu:18004 Lys/Glu:18004
## K50 : 560 Ile: 7252 Lys/Ile: 7252
## K68 : 560 Met:10752 Lys/Met:10752
## (Other):122556 Thr:18004 Lys/Thr:18004
## pos_ID Lys_reactivity Lys_react_threshold
## A0AVT1_K1014: 14 Min. : 0.10 Low :48818
## A0AVT1_K409 : 14 1st Qu.: 5.80 Medium: 9254
## A0AVT1_K544 : 14 Median :10.00 High : 4256
## A0AVT1_K86 : 14 Mean : 7.92 NA's :63700
## A0AVT1_K963 : 14 3rd Qu.:10.00
## A0JNW5_K645 : 14 Max. :10.00
## (Other) :125944 NA's :63700
## Lys_target_label geneNamePrimary CADD_score assembly
## Target : 1610 AHNAK : 1470 Min. : 0.001 hg19:63014
## panReactive:111468 FLNA : 700 1st Qu.:23.100 hg38:63014
## NA's : 12950 FLNB : 658 Median :24.800
## MYH9 : 588 Mean :25.234
## PLEC : 504 3rd Qu.:27.600
## PGK1 : 420 Max. :40.000
## (Other):121688
names(cad)
## [1] "pos_id19" "pos_id38" "xref"
## [4] "matched_target" "lost_amino" "gained_amino"
## [7] "Amino_acids" "pos_ID" "Cys_reactivity"
## [10] "Cys_react_threshold" "Cys_target_label" "geneNamePrimary"
## [13] "CADD_score" "assembly"
names(kad)
## [1] "pos_id19" "pos_id38" "xref"
## [4] "matched_target" "lost_amino" "gained_amino"
## [7] "Amino_acids" "pos_ID" "Lys_reactivity"
## [10] "Lys_react_threshold" "Lys_target_label" "geneNamePrimary"
## [13] "CADD_score" "assembly"
CChigh <- cad %>%
filter(Cys_react_threshold == "High" ) %>%
ggplot(aes(x=Amino_acids, y=CADD_score, fill=assembly)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Missense CADD scores for highly reactive Cysteines") +
ylab("CADD score") +
xlab("Missense amino acids") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,5,10,15,20,25,30,35))
CChigh
ggsave("CADDmiss_CHIGH_violin.png", width = 7, height=4, dpi= 300)
CCmed <- cad %>%
filter(Cys_react_threshold == "Medium" ) %>%
ggplot(aes(x=Amino_acids, y=CADD_score, fill=assembly)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Missense CADD scores for medium reactive Cysteines") +
ylab("CADD score") +
xlab("Missense amino acids") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,5,10,15,20,25,30,35))
CCmed
ggsave("CADDmiss_CMEDIUM_violin.png", width = 7, height=4, dpi= 300)
CClow <- cad %>%
filter(Cys_react_threshold == "Low" ) %>%
ggplot(aes(x=Amino_acids, y=CADD_score, fill=assembly)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Missense CADD scores for low reactive Cysteines") +
ylab("CADD score") +
xlab("Missense amino acids") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,10,20,30,40,50,60,70))
CClow
ggsave("CADDmiss_CLOW_violin.png", width = 7, height=4, dpi= 300)
KKhigh <- kad %>%
filter(Lys_react_threshold == "High" ) %>%
ggplot(aes(x=Amino_acids, y=CADD_score, fill=assembly)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Missense CADD scores for highly reactive Lysines") +
ylab("CADD score") +
xlab("Missense amino acids") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,5,10,15,20,25,30,35,40))
KKhigh
ggsave("CADDmiss_KHIGH_violin.png", width = 7, height=4, dpi= 300)
KKmed <- kad %>%
filter(Lys_react_threshold == "Medium" ) %>%
ggplot(aes(x=Amino_acids, y=CADD_score, fill=assembly)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Missense CADD scores for medium reactive Lysines") +
ylab("CADD score") +
xlab("Missense amino acids") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,5,10,15,20,25,30,35))
KKmed
ggsave("CADDmiss_KMEDIUM_violin.png", width = 7, height=4, dpi= 300)
KKlow <- kad %>%
filter(Lys_react_threshold == "Low" ) %>%
ggplot(aes(x=Amino_acids, y=CADD_score, fill=assembly)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Missense CADD scores for low reactive Lysines") +
ylab("CADD score") +
xlab("Missense amino acids") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,5,10,15,20,25,30,35,40))
KKlow
ggsave("CADDmiss_KLOW_violin.png", width = 7, height=4, dpi= 300)
CCpan <- cad %>%
filter(Cys_target_label == "panReactive" ) %>%
ggplot(aes(x=Amino_acids, y=CADD_score, fill=assembly)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Missense CADD scores for panReactive Cysteines") +
ylab("CADD score") +
xlab("Missense amino acids") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,10,20,30,40,50,60,70))
CCpan
ggsave("CADDmiss_CPAN_violin.png", width = 7, height=4, dpi= 300)
CCtarget <- cad %>%
filter(Cys_target_label == "Target" ) %>%
ggplot(aes(x=Amino_acids, y=CADD_score, fill=assembly)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Missense CADD scores for target Cysteines") +
ylab("CADD score") +
xlab("Missense amino acids") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,5,10,15,20,25,30,35))
CCtarget
ggsave("CADDmiss_CTARGET_violin.png", width = 7, height=4, dpi= 300)
KKpan <- kad %>%
filter(Lys_target_label == "panReactive" ) %>%
ggplot(aes(x=Amino_acids, y=CADD_score, fill=assembly)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Missense CADD scores for panReactive Lysines") +
ylab("CADD score") +
xlab("Missense amino acids") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,5,10,15,20,25,30,35,40))
KKpan
ggsave("CADDmiss_KPAN_violin.png", width = 7, height=4, dpi= 300)
KKtarget <- kad %>%
filter(Lys_target_label == "Target" ) %>%
ggplot(aes(x=Amino_acids, y=CADD_score, fill=assembly)) + geom_violin(alpha=1/3, trim=TRUE, draw_quantiles = c(0.25, 0.5, 0.75)) + theme_classic() + scale_fill_grey() +
ggtitle("Missense CADD scores for target Lysines") +
ylab("CADD score") +
xlab("Missense amino acids") +
theme(plot.title = element_text(hjust=0.5), axis.title.x = element_text(size=15, color="black", margin=margin(t=15, b=5)),axis.title.y = element_text(size=15, color="black", margin=margin(t=0, r=15, b=0)), axis.text=element_text(size=12, color="black",margin=margin(t=15, b=5))) +
scale_y_continuous(breaks = c(0,5,10,15,20,25,30,35))
KKtarget
ggsave("CADDmiss_KTARGET_violin.png", width = 7, height=4, dpi= 300)